// Generates summary statistics and variation plots
// Author Levi Marks levi.marks.1>at>gmail.com

global DATA "[Insert Path Here]"
set more off
graph drop _all

// 0 // Mark trimmed sample

use "$DATA/ghgrp_di_intermediate", clear
qui keep if trim_5 != 1
qui keep facility_id year
qui gen quality = 1
qui compress*
qui save "$DATA/Temp/trimmed_sample", replace

// 1 // Summary statistics

use "$DATA/ghgrp_di", clear
qui merge m:1 facility_id year using "$DATA/Temp/trimmed_sample"
qui drop _m

qui replace ch4_comp = ch4_comp + ch4_testing

// Convert units
foreach var in ch4_all ch4_normal ch4_comp ch4_assoc ch4_combust gas prod_mcf oil {
	qui replace `var' = `var'/1000
}

tabstat rate, stat(mean sd) format(%9.4fc) col(stat)
tabstat rate if quality == 1, stat(mean sd) format(%9.4fc) col(stat)

tabstat ch4_all ch4_normal ch4_comp ch4_assoc ch4_combust gas oil wells completions, stat(mean sd) format(%9.0fc) col(stat)
tabstat ch4_all ch4_normal ch4_comp ch4_assoc ch4_combust gas oil wells completions if quality == 1, stat(mean sd) format(%9.0fc) col(stat)

tabstat p_spot, stat(mean sd N) format(%9.2fc) col(stat)
tabstat p_spot if quality == 1, stat(mean sd N) format(%9.2fc) col(stat)

// Number of facilities
preserve
qui collapse (count) p_spot, by(facility_id)
tabstat p_spot, stat(N)
restore

preserve
qui keep if quality == 1
qui collapse (count) p_spot, by(facility_id)
tabstat p_spot, stat(N)
restore

// Variation across facilities within a year and within facilities over time
qui keep if quality == 1
preserve
qui collapse (sd) rate, by(year)
tabstat rate, stat(mean)
restore
preserve
qui collapse (sd) rate, by(facility)
tabstat rate, stat(mean)
restore

// 2 // Graph of distribution of ratio of GHGRP production (prod_mcf) to DI production (gas)

use "$DATA/ghgrp_di", clear
gen prod_ratio = prod_mcf/gas if year >= 2015
bysort facility_id: egen mean_prod_ratio = mean(prod_ratio)
replace mean_prod_ratio = 2 if mean_prod_ratio > 2 & !missing(mean_prod_ratio)

twoway hist mean_prod_ratio, color(gs12) bin(25) lcolor(gs8) ///
	graphregion(color(white)) xsize(6) ysize(4) ///
	xtitle("Ratio of GHGRP Production to DI Production", margin(0 0 0 2) size(medlarge)) /// 
	xlabel(,labsize(medlarge)) ///
	ytitle("Density", margin(0 2 0 0) size(medlarge)) ylabel("") ///
	xline(.8, lpattern(dash) lcolor(gs8)) ///
	xline(1.25, lpattern(dash) lcolor(gs8)) ///
	name(prod_ratio)
graph export "$DATA/Results/p8_prod_ratio.pdf", replace





// 3 // Graphs showing variation in prices and rates

// 3.1 // Variation in prices

use "$DATA/ghgrp_di_intermediate", clear
drop if trim_5 == 1

rename region region_encoded
decode region_encoded, gen(region)

// Density

twoway hist p_spot, bin(18) fcolor(gs12) lcolor(gs9) ///
	graphregion(color(white) margin(2 4 2 2)) xsize(6) ysize(4) ///
	xlabel(1 "1.00" 2 "2.00" 3 "3.00" 4 "4.00" 5 "5.00" 6 "6.00", labsize(large) nogrid labgap(2)) ///
	ylabel(, tlstyle(none) labsize(large) nolabel nogrid )  ///
	xtitle("Price ($/Mcf)", size(vlarge)  margin(0 0 0 2)) ///
	ytitle("Density", margin(11 4 0 0) size(vlarge)) ///
	name(p_density) saving("$DATA/Graphs/p_density", replace)

// Variation over time

levelsof facility_id, local(facilities)
foreach id in `facilities' {
		local call `call' line p_spot year if facility == `id', color(gs10) lwidth(vvthin) ||
}
bysort region year: egen region_p = mean(p_spot)

sort facility_id year
twoway `call' ///
	connected region_p year if region == "Mountain" & facility == 1007481, color(midgreen) lwidth(medthick) msymbol(circle) msize(medlarge) || ///
	connected region_p year if region == "South Central" & facility == 1006884, color(navy) lwidth(medthick) msymbol(diamond) || ///
	connected region_p year if region == "East" & facility == 1008051, color(edkblue) lwidth(medthick) msymbol(square)  || ///
	connected region_p year if region == "Midwest" & facility == 1008299, color(dkgreen) lwidth(medthick) msymbol(triangle) || ///
	connected region_p year if region == "Pacific" & facility == 1008405, color(ebblue) lwidth(medthick) msymbol(diamond)  ///
	graphregion(color(white)) xsize(6) ysize(4) ///
	xtitle("") /// 
	xlabel(,labsize(large)) ///
	ytitle("Price ($/Mcf)", margin(0 3 0 0) size(vlarge)) ///
	ylabel(0 "0" 1 "1.00" 2 "2.00" 3 "3.00" 4 "4.00" 5 "5.00" 6 "6.00", ang(hor) labsize(large)) ///
	legend(order(214 "Mountain"  216 "East" 217 "Midwest" 218 "Pacific" 215 "South" "Central" 213 "Individual" "Facility") size(medlarge) colgap(4) rowgap(2) keygap(2) symxsize(*.5) col(2) pos(8) ring(0) bmargin(3 3 3 3) region(lwidth(thin) lcolor(gs4) margin(2 2 1 1)) ) ///
	name(prices_1) saving("$DATA/Graphs/prices_1", replace)


// Residual variation over time

reg p_spot i.region_year i.facility_id
predict p_res, res

local call // Resetting lines local

bysort region: gen int rn_id = round(runiform()*8 + 4) // Random id variable for color
bysort facility_id: replace rn_id = rn_id[1]
foreach id in `facilities' {
		qui sum rn_id if facility == `id'
		local grey `r(mean)'
		local call `call' line p_res year if facility == `id', ///
		color(gs`grey') lwidth(vvthin) ||
}

sort facility_id year
twoway `call' ///
	line p_res year if facility == 0, ///
	graphregion(color(white)) xsize(6) ysize(4) ///
	xtitle("") /// 
	xlabel(,labsize(large)) ///
	ytitle("Price ($/Mcf)", margin(0 1 0 0) size(vlarge)) ///
	ylabel(-.75 "-0.75" -.5 "-0.50" -.25 "-0.25" 0 "0" .25 "0.25" .5 "0.50" .75 "0.75" 1 "1.00" 1.25 "1.25" , ang(hor) labsize(large)) ///
	legend(off) ///
	name(prices_2) saving("$DATA/Graphs/prices_2", replace)


// 3.2 // Variation in rates

use "$DATA/ghgrp_di_intermediate", clear
drop if trim_5 == 1

rename region region_encoded
decode region_encoded, gen(region)

// Density

gen rate_trunc = rate
replace rate_trunc = .08 if rate_trunc > .08

twoway hist rate_trunc, bin(25) fcolor(gs12) lcolor(gs9) ///
	graphregion(color(white) margin(2 4 2 2)) xsize(6) ysize(4) ///
	xlabel(0 "0%" .02 "2%" .04 "4%" .06 "6%" .08 "8%", labsize(large) nogrid labgap(2)) ///
	ylabel(, tlstyle(none) labsize(large) nolabel nogrid )  ///
	xtitle("Emission Rate", size(vlarge)  margin(0 0 0 3)) ///
	ytitle("Density", margin(8 4 0 0) size(vlarge)) ///
	name(r_density) saving("$DATA/Graphs/r_density", replace)

// Variation over time

levelsof facility_id, local(facilities)
local call // Resetting lines local
foreach id in `facilities' {
		local call `call' line rate_trunc year if facility == `id', color(gs11) lwidth(vvthin) ||
}
bysort region year: egen region_r = mean(rate)
bysort year: egen mean_r = mean(rate)

sort facility_id year
twoway `call' ///
	line mean_r year if facility == 1007481, color(gs8) lwidth(vvthick) || ///
	connected region_r year if region == "Mountain" & facility == 1007481, color(midgreen) lwidth(medthick) msymbol(circle) msize(medlarge) || ///
	connected region_r year if region == "South Central" & facility == 1006884, color(navy) lwidth(medthick) msymbol(diamond) || ///
	connected region_r year if region == "East" & facility == 1008051, color(edkblue) lwidth(medthick) msymbol(square)  || ///
	connected region_r year if region == "Midwest" & facility == 1008299, color(dkgreen) lwidth(medthick) msymbol(triangle) || ///
	connected region_r year if region == "Pacific" & facility == 1008405, color(ebblue) lwidth(medthick) msymbol(diamond)  ///
	graphregion(color(white)) xsize(6) ysize(4) ///
	xtitle("") /// 
	xlabel(,labsize(large)) ///
	ytitle("Emission Rate", margin(0 3 0 0) size(vlarge)) ///
	ylabel(0 "0%" .02 "2%" .04 "4%" .06 "6%" .08 "8%", ang(hor) labsize(large)) ///
	legend(order(215 "Mountain"  217 "East" 218 "Midwest" 219 "Pacific" 216 "South" "Central" 213 "Individual" "Facility" 214 "Overall" "Average") size(medlarge) colgap(4) rowgap(2) keygap(2)  symxsize(*.5) col(2) pos(2) ring(0) bmargin(4 4 4 4) region(lwidth(thin) lcolor(gs4) margin(2 2 1 1)) ) ///
	name(rates_1) saving("$DATA/Graphs/rates_1", replace)
	

// Residual variation over time

reg rate i.region_year i.facility_id
predict r_res, res

local call // Resetting lines local

bysort region: gen int rn_id = round(runiform()*8 + 4) // Random id variable for color
bysort facility_id: replace rn_id = rn_id[1]
foreach id in `facilities' {
		qui sum rn_id if facility == `id'
		local grey `r(mean)'
		local call `call' line r_res year if facility == `id', ///
		color(gs`grey') lwidth(vvthin) ||
}

sort facility_id year
twoway `call' ///
	line r_res year if facility == 0, ///
	graphregion(color(white)) xsize(6) ysize(4) ///
	xtitle("") /// 
	xlabel(,labsize(large)) ///
	ytitle("Emission Rate", margin(0 1 0 0) size(vlarge)) ///
	ylabel(-.04 "-4%" -.02 "-2%" 0 "0%" .02 "2%" .04 "4%" .06 "6%", ang(hor) labsize(large)) ///
	legend(off) ///
	name(rates_2) saving("$DATA/Graphs/rates_2", replace)

	

// 3.3 // Combining	
	
graph combine "$DATA/Graphs/p_density.gph" "$DATA/Graphs/r_density.gph" , ///
	col(2) iscale(1) imargin(3 5 0 0) ///
	graphregion(color(white)) ///
	xsize(12) ysize(4) ///
	title("Raw Data", size(vlarge) margin(0 0 4 0)) ///
	saving("$DATA/Graphs/variation_0",replace)
	
graph export "$DATA/Results/variation_0.pdf", replace


graph combine "$DATA/Graphs/prices_1.gph" "$DATA/Graphs/rates_1.gph" , ///
	col(2) iscale(1) imargin(3 5 0 0) ///
	graphregion(color(white)) ///
	xsize(12) ysize(4) ///
	title("Variation Over Time and Regional Averages", size(vlarge) margin(0 0 4 0)) ///
	saving("$DATA/Graphs/variation_1",replace)
	
graph export "$DATA/Results/variation_1.pdf", replace


graph combine "$DATA/Graphs/prices_2.gph" "$DATA/Graphs/rates_2.gph" , ///
	col(2) iscale(1) imargin(3 5 0 0) ///
	graphregion(color(white)) ///
	xsize(12) ysize(4) ///
	title("Residual Variation Conditional on Facility and Region-by-Year Fixed Effects", size(vlarge) margin(0 0 4 0)) ///
	saving("$DATA/Graphs/variation_2",replace)
	
graph export "$DATA/Results/variation_2.pdf", replace



